#VERSION INFORMATION
#RESULTS COMPUTED WITH R 3.0.0.
#VERSION 1.01 OF THE 'TSA' LIBRARY
#VERSION 0.10-31 OF THE 'tseries' LIBRARY
#VERSION 1.5-9.1 OF THE 'locfit' LIBRARY
#VERSION 1.7-22. OF THE 'mgcv' LIBRARY
#In the unlikely event that results are not exactly reproduced with the code in this file, you may try the file versions listed above.

#libraries
rm(list=ls())
library(TSA)
library(foreign)
options(scipen=8)

###set to YOUR working directory
setwd('/Volumes/MONOGAN/CONSULTING/japanEnergy/')

#load data 
data <- read.dta('comprehensiveJapanEnergy.dta')
#data <- read.csv('comprehensiveJapanEnergy.csv')
summary(data)

#create a pulse intervention variable
data$pulse<-as.numeric(data$time==39)

#### MODELING ALL ENERGY USE ###
#create logarithm of usage
data$lall<-log(data$all)

#plots of original and logged series
plot(y=data$all, x=data$time, type='l')
plot(y=data$lall, x=data$time, type='l')

#identify arima process
acf(data$lall,26)
pacf(data$lall,26)

#Estimate ARMA(1,1), SARMA(1,0) model. Clear 12-month seasonality
mod.all.arma <- arima(data$lall, order=c(1,0,1), seasonal=list(order=c(1,0,0), period=12))
mod.all.arma
#AIC=-212.45

#Alternate model: ARMA(2,0), SARMA(1,0) model. Clear 12-month seasonality
mod.all.alt <- arima(data$lall, order=c(2,0,0), seasonal=list(order=c(1,0,0), period=12))
mod.all.alt
#AIC=-211.4

#diagnose ARMA(1,1), SARMA(1,0) model
acf(mod.all.arma$residuals,26)
pacf(mod.all.arma$residuals,26)
Box.test(mod.all.arma$residuals,26,"Ljung-Box")

#estimate intervention analysis for March 2011 (start with a pulse)
all.pulse <- arimax(data$lall, order=c(1,0,1), seasonal=list(order=c(1,0,0), period=12), xtransf=data$pulse, xreg=as.data.frame(cbind(data$temp,data$temp2)), transfer=list(c(1,0))); all.pulse
#AIC=-222.54

#our coefficient for the decay term is practically 1, let's try the step function
all.step <- arimax(data$lall, order=c(1,0,1), seasonal=list(order=c(1,0,0), period=12), xtransf=data$dummy, xreg=as.data.frame(cbind(data$temp,data$temp2)), transfer=list(c(0,0))); all.step
#AIC=-224.45

#Any buildup on the step?
all.buildup <- arimax(data$lall, order=c(1,0,1), seasonal=list(order=c(1,0,0), period=12), xtransf=data$dummy, xreg=as.data.frame(cbind(data$temp,data$temp2)), transfer=list(c(1,0))); all.buildup
#AIC=-222.99
#Step only has the best fit. Also, a negative buildup term on the third model is hard to believe.

#calculate z-ratios
all.step$coef
sqrt(diag(all.step$var.coef))
all.step$coef/sqrt(diag(all.step$var.coef)) #z-ratios

#Ljung-Box test on final model
Box.test(all.step$residuals,26,"Ljung-Box")

#Graph the intervention model
png('allIntervention.png',width=4, height=4, units='in',pointsize=10, res=500)
y.pred <- 18.4030088557-0.0185984299*mean(data$temp)+0.0006404635*(mean(data$temp)^2)-0.0896279447*data$dummy
plot(y=data$lall, x=data$time, type='l',xlab="Months",ylab="Logged Usage (MWH)", axes=F)
lines(y=y.pred, x=data$time, lty=3, col='red',lwd=2)
axis(1,at=c(1,13,25,37,49),labels=c("Jan 2008","Jan 2009","Jan 2010","Jan 2011","Jan 2012"))
axis(2)
axis(4,at=c(log(75000000),log(80000000),log(85000000),log(90000000),log(95000000)),labels=c(75000000,80000000,85000000,90000000,95000000))
box()
dev.off()

#### MODELING TEPCO USE ###
#create logarithm of usage
data$ltepco<-log(data$tepco)

#plots of original and logged series
plot(y=data$tepco, x=data$time, type='l')
plot(y=data$ltepco, x=data$time, type='l')

#identify arima process
acf(data$ltepco,26)
pacf(data$ltepco,26)

#Estimate ARMA(1,1), SARMA(1,0) model. Clear 12-month seasonality
mod.tepco.arma <- arima(data$ltepco, order=c(1,0,1), seasonal=list(order=c(1,0,0), period=12))
mod.tepco.arma

#diagnose arima model
acf(mod.tepco.arma$residuals,26)
pacf(mod.tepco.arma$residuals,26)
Box.test(mod.tepco.arma$residuals,26,"Ljung-Box")

#estimate intervention analysis for March 2011 (start with a pulse)
tepco.pulse <- arimax(data$ltepco, order=c(1,0,1), seasonal=list(order=c(1,0,0), period=12), xtransf=data$pulse, xreg=as.data.frame(cbind(data$temp,data$temp2)), transfer=list(c(1,0))); tepco.pulse
#AIC=-197.7

#our coefficient for the decay term is practically 1, let's try the step function
tepco.step <- arimax(data$ltepco, order=c(1,0,1), seasonal=list(order=c(1,0,0), period=12), xtransf=data$dummy, xreg=as.data.frame(cbind(data$temp,data$temp2)), transfer=list(c(0,0))); tepco.step
#AIC=-198.72

#Any buildup on the step?
tepco.buildup <- arimax(data$ltepco, order=c(1,0,1), seasonal=list(order=c(1,0,0), period=12), xtransf=data$dummy, xreg=as.data.frame(cbind(data$temp,data$temp2)), transfer=list(c(1,0))); tepco.buildup
#AIC=-196.88
#Step only has the best fit. Also, a negative buildup term on the third model is hard to believe.

#calculate z-ratios
tepco.step$coef
sqrt(diag(tepco.step$var.coef))
tepco.step$coef/sqrt(diag(tepco.step$var.coef)) #z-ratios

#Ljung-Box test on final model
Box.test(tepco.step$residuals,26,"Ljung-Box")

#Graph the intervention model
png('tepcoIntervention.png',width=4, height=4, units='in',pointsize=10, res=500)
y.pred <- 17.243355487-0.033559907*mean(data$temp)+0.001032312*(mean(data$temp)^2)-0.128327231*data$dummy
plot(y=data$ltepco, x=data$time, type='l',xlab="Months",ylab="Logged Usage (MWh)", axes=F)
lines(y=y.pred, x=data$time, lty=3, col='red',lwd=2)
axis(1,at=c(1,13,25,37,49),labels=c("Jan 2008","Jan 2009","Jan 2010","Jan 2011","Jan 2012"))
axis(2)
axis(4,at=c(log(20000000),log(22000000),log(24000000),log(26000000),log(28000000)),labels=c(20000000,22000000,24000000,26000000,28000000))
box()
dev.off()

#### MODELING KEPCO USE ###
#create logarithm of usage
data$lkepco<-log(data$kepco)

#plots of original and logged series
plot(y=data$kepco, x=data$time, type='l')
plot(y=data$lkepco, x=data$time, type='l')

#identify arima process
acf(data$lkepco,26)
pacf(data$lkepco,26)

#Estimate ARMA(1,1), SARMA(1,0) model. Clear 12-month seasonality
mod.kepco.arma <- arima(data$lkepco, order=c(1,0,1), seasonal=list(order=c(1,0,0), period=12))
mod.kepco.arma

#diagnose arima model
acf(mod.kepco.arma$residuals,26)
pacf(mod.kepco.arma$residuals,26)
Box.test(mod.kepco.arma$residuals,26,"Ljung-Box") #p=.02, but we'll proceed under the theory that the DGP is consistent for all series, then rediagnose the whole model.

#estimate intervention analysis for March 2011 (start with a pulse)
kepco.pulse <- arimax(data$lkepco, order=c(1,0,1), seasonal=list(order=c(1,0,0), period=12), xtransf=data$pulse, xreg=as.data.frame(cbind(data$temp,data$temp2)), transfer=list(c(1,0))); kepco.pulse
#AIC=-191.91, and convergence issues.

#Try the step, as the pulse was problematic
kepco.step <- arimax(data$lkepco, order=c(1,0,1), seasonal=list(order=c(1,0,0), period=12), xtransf=data$dummy, xreg=as.data.frame(cbind(data$temp,data$temp2)), transfer=list(c(0,0))); kepco.step
#AIC=-195.58

#Any buildup on the step?
kepco.buildup <- arimax(data$lkepco, order=c(1,0,1), seasonal=list(order=c(1,0,0), period=12), xtransf=data$dummy, xreg=as.data.frame(cbind(data$temp,data$temp2)), transfer=list(c(1,0))); kepco.buildup
#AIC=-194.21
#Step only has the best fit. Also, a negative buildup term on the third model is hard to believe.

#calculate z-ratios
kepco.step$coef
sqrt(diag(kepco.step$var.coef))
kepco.step$coef/sqrt(diag(kepco.step$var.coef)) #z-ratios

#Ljung-Box test on final model
Box.test(kepco.step$residuals,26,"Ljung-Box")

#Graph the intervention model
png('kepcoIntervention.png',width=4, height=4, units='in',pointsize=10, res=500)
y.pred <- 16.4606867770-0.0244827278*mean(data$temp)+0.0008433583*(mean(data$temp)^2)-0.0492757156*data$dummy
plot(y=data$lkepco, x=data$time, type='l',xlab="Months",ylab="Logged Usage (MWh)", axes=F)
lines(y=y.pred, x=data$time, lty=3, col='red',lwd=2)
axis(1,at=c(1,13,25,37,49),labels=c("Jan 2008","Jan 2009","Jan 2010","Jan 2011","Jan 2012"))
axis(2)
axis(4,at=c(log(11000000),log(12000000),log(13000000),log(14000000),log(15000000)),labels=c(11000000,12000000,13000000,14000000,15000000))
box()
dev.off()

#### MODELING LARGE USE ###
#create logarithm of usage
data$llarge<-log(data$large)

#plots of original and logged series
plot(y=data$large, x=data$time, type='l')
plot(y=data$llarge, x=data$time, type='l')

#identify arima process
acf(data$llarge,26)
pacf(data$llarge,26)

#Estimate ARMA(1,1), SARMA(1,0) model. Seasonality isn't as apparent, but assuming the DGP is similar.
mod.large.arma <- arima(data$llarge, order=c(1,0,1), seasonal=list(order=c(1,0,0), period=12))
mod.large.arma

#diagnose arima model
acf(mod.large.arma$residuals,26)
pacf(mod.large.arma$residuals,26)
Box.test(mod.large.arma$residuals,26,"Ljung-Box")

#estimate intervention analysis for March 2011 (start with a pulse)
large.pulse <- arimax(data$llarge, order=c(1,0,1), seasonal=list(order=c(1,0,0), period=12), xtransf=data$pulse, xreg=as.data.frame(cbind(data$temp,data$temp2)), transfer=list(c(1,0))); large.pulse
#AIC=-247.49

#reasonable decay term that's less than 1, but try the step anyway
large.step <- arimax(data$llarge, order=c(1,0,1), seasonal=list(order=c(1,0,0), period=12), xtransf=data$dummy, xreg=as.data.frame(cbind(data$temp,data$temp2)), transfer=list(c(0,0))); large.step
#AIC=-247.97
#comparable fit

#Any buildup on the step?
large.buildup <- arimax(data$llarge, order=c(1,0,1), seasonal=list(order=c(1,0,0), period=12), xtransf=data$dummy, xreg=as.data.frame(cbind(data$temp,data$temp2)), transfer=list(c(1,0))); large.buildup
#AIC=-246.72
#Slightly worse fit, and accumulation term is nondiscernible.

#entertain the idea of a compound model
large.compound <- arimax(data$llarge, order=c(1,0,1), seasonal=list(order=c(1,0,0), period=12), xtransf=cbind(data$pulse,data$dummy), xreg=as.data.frame(cbind(data$temp,data$temp2)), transfer=list(c(1,0),c(0,0))); large.compound
#Neither coefficient is individually significant due to collinearity, but best fit at AIC=-245.73

#joint significance test of all terms
mod.notransf<-arimax(data$llarge, order=c(1,0,1), seasonal=list(order=c(1,0,0), period=12), xreg=as.data.frame(cbind(data$temp,data$temp2)))
-2*mod.notransf$loglik+2*large.compound$loglik
1-pchisq(-2*mod.notransf$loglik+2*large.compound$loglik,df=3)
#likelihood ratio test: 13.12988, 3 df, p=0.00436
#coefficients are jointly significant

#calculate z-ratios
large.compound$coef
sqrt(diag(large.compound$var.coef))
large.compound$coef/sqrt(diag(large.compound$var.coef)) #z-ratios

#Ljung-Box test on final model
Box.test(large.compound$residuals,26,"Ljung-Box")

#Graph the intervention model
png('largeIntervention.png',width=4, height=4, units='in',pointsize=10, res=500)
y.pred <- 17.3815928892-0.0077405431*mean(data$temp)+0.0003034385*(mean(data$temp)^2) -0.0576210093*(0.5605124134^(data$time-39))*data$dummy-0.0301667076*data$dummy
plot(y=data$llarge, x=data$time, type='l',xlab="Months",ylab="Logged Usage (MWh)", axes=F)
lines(y=y.pred, x=data$time, lty=3, col='red',lwd=2)
axis(1,at=c(1,13,25,37,49),labels=c("Jan 2008","Jan 2009","Jan 2010","Jan 2011","Jan 2012"))
axis(2)
axis(4,at=c(log(28000000),log(30000000),log(32000000),log(34000000),log(36000000),log(38000000)),labels=c(28000000,30000000,32000000,34000000,36000000,38000000))
box()
dev.off()

#### MODELING HOUSEHOLD USE ###
#create logarithm of usage
data$lhouse<-log(data$house)

#plots of original and logged series
plot(y=data$house, x=data$time, type='l')
plot(y=data$lhouse, x=data$time, type='l')

#identify arima process
acf(data$lhouse,26)
pacf(data$lhouse,26)

#Estimate ARMA(1,1), SARMA(1,0) model. Clear 12-month seasonality
mod.house.arma <- arima(data$lhouse, order=c(1,0,1), seasonal=list(order=c(1,0,0), period=12))
mod.house.arma

#diagnose arima model
acf(mod.house.arma$residuals,26)
pacf(mod.house.arma$residuals,26)
Box.test(mod.house.arma$residuals,26,"Ljung-Box")

#estimate intervention analysis for March 2011 (start with a pulse)
house.pulse <- arimax(data$lhouse, order=c(1,0,1), seasonal=list(order=c(1,0,0), period=12), xtransf=data$pulse, xreg=as.data.frame(cbind(data$temp,data$temp2)), transfer=list(c(1,0))); house.pulse
#AIC=-144.56

#our coefficient for the decay term is bigger than 1, let's try the step function
house.step <- arimax(data$lhouse, order=c(1,0,1), seasonal=list(order=c(1,0,0), period=12), xtransf=data$dummy, xreg=as.data.frame(cbind(data$temp,data$temp2)), transfer=list(c(0,0))); house.step
#AIC=-141.64

#Any buildup on the step?
house.buildup <- arimax(data$lhouse, order=c(1,0,1), seasonal=list(order=c(1,0,0), period=12), xtransf=data$dummy, xreg=as.data.frame(cbind(data$temp,data$temp2)), transfer=list(c(1,0))); house.buildup
#AIC=-144.55
#Buildup and pulse have similar fit, but pulse had an implausible decay term.

#calculate z-ratios
house.buildup$coef
sqrt(diag(house.buildup$var.coef))
house.buildup$coef/sqrt(diag(house.buildup$var.coef)) #z-ratios

#Ljung-Box test on final model
Box.test(house.buildup$residuals,26,"Ljung-Box")

#Graph the intervention model
png('houseIntervention.png',width=4, height=4, units='in',pointsize=10, res=500)
cumulation<-rep(0,60)
for (i in 40:60){cumulation[i]<-cumulation[i-1]-0.02789959408*(0.72178341473^(data$time[i]-39))}
y.pred <- 17.16390220469-0.05116213297*mean(data$temp)+0.00139703294*(mean(data$temp)^2)-0.02789959408*data$dummy+cumulation
plot(y=data$lhouse, x=data$time, type='l',xlab="Months",ylab="Logged Usage (MWh)", axes=F)
lines(y=y.pred, x=data$time, lty=3, col='red',lwd=2)
axis(1,at=c(1,13,25,37,49),labels=c("Jan 2008","Jan 2009","Jan 2010","Jan 2011","Jan 2012"))
axis(2)
axis(4,at=c(log(14000000),log(16000000),log(18000000),log(20000000),log(22000000),log(24000000),log(26000000)),labels=c(14000000,16000000,18000000,20000000,22000000,24000000,26000000))
box()
dev.off()

